Leveraging Social Media Sentiment Analysis for Real-Time Agricultural Market Trend Forecasting¶

In [13]:
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import nltk, re, warnings
warnings.filterwarnings('ignore')
nltk.download('vader_lexicon')

# Load dataset
df = pd.read_csv(r"C:\Users\malot\OneDrive\Desktop\Big data\Agri_BigData_3000.csv")
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True, errors='coerce')

print("✅ Dataset Loaded Successfully with", df.shape[0], "rows")
df.head()
✅ Dataset Loaded Successfully with 3000 rows
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\malot\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
Out[13]:
Region Crop Emotion Post_Text Date Sentiment_Score Price Demand_Index Temperature Rainfall
0 Gujarat Wheat Positive wilt 2025-03-04 0 4562.48 136 34.81 10.91
1 Maharashtra Pulses Positive pest attack 2025-01-24 1 4731.54 63 31.22 14.32
2 Gujarat Pulses Happy borer 2025-05-31 1 5221.20 157 36.12 13.96
3 Telangana Sugarcane Neutral blight 2025-03-28 0 3715.47 95 36.95 12.07
4 Punjab Rice Angry healthy crop 2025-02-01 -1 9636.10 110 25.86 12.57

USE CASE 1 — Early Warning System for Pest & Disease Outbreaks¶

In [15]:
# Step 1: Detect pest/disease-related posts from Post_Text
pest_keywords = ['pest', 'attack', 'borer', 'blight', 'wilt', 'fungus', 'disease', 'infestation']
def detect_pest_risk(text):
    text = str(text).lower()
    return 1 if any(word in text for word in pest_keywords) else 0

df['Pest_Alert'] = df['Post_Text'].apply(detect_pest_risk)

# Step 2: Create Risk Score using Sentiment & Pest Mentions
df['Risk_Score'] = df['Pest_Alert'] * (1 - ((df['Sentiment_Score'] + 1) / 2)) * 100

# Step 3: Categorize Alerts
def risk_label(score):
    if score >= 60: return 'High'
    elif score >= 30: return 'Moderate'
    else: return 'Low'

df['Alert_Level'] = df['Risk_Score'].apply(risk_label)

# Step 4: Visualization
fig = px.bar(df.groupby('Region')['Risk_Score'].mean().reset_index(),
             x='Region', y='Risk_Score', color='Risk_Score',
             color_continuous_scale='Reds',
             title='🌾 Average Pest/Disease Risk by Region')
fig.show()

# Step 5: WordCloud for high-risk posts
pest_text = " ".join(df[df['Alert_Level'] == "High"]['Post_Text'])
wc = WordCloud(width=900, height=400, background_color='white').generate(pest_text)
plt.imshow(wc, interpolation='bilinear')
plt.axis('off')
plt.title("Pest/Disease Mentions — High Risk Posts")
plt.show()
No description has been provided for this image

USE CASE 2 — Real-Time Commodity Price Nowcasting¶

In [19]:
features = ['Sentiment_Score', 'Demand_Index', 'Temperature', 'Rainfall']
X = df[features]
y = df['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
lr = LinearRegression().fit(X_train, y_train)

df['Predicted_Price'] = lr.predict(X)

rmse = np.sqrt(mean_squared_error(y_test, lr.predict(X_test)))
r2 = r2_score(y_test, lr.predict(X_test))
print(f"📊 Price Nowcast - RMSE: {round(rmse,2)} | R²: {round(r2,3)}")

fig = px.scatter(df, x='Date', y='Price', color='Crop',
                 title='Actual vs Predicted Commodity Prices',
                 hover_data=['Region'])
fig.add_traces(px.line(df, x='Date', y='Predicted_Price', color='Crop').data)
fig.show()
📊 Price Nowcast - RMSE: 2073.63 | R²: 0.012

USE CASE 3 — Farmer Sentiment Monitoring¶

In [22]:
# Step 1: Sentiment Labeling
def sentiment_label(x):
    if x > 0: return 'Positive'
    elif x < 0: return 'Negative'
    else: return 'Neutral'

df['Sentiment_Label'] = df['Sentiment_Score'].apply(sentiment_label)

# Step 2: Visualization - Sentiment Distribution
fig = px.histogram(df, x='Sentiment_Label', color='Region', barmode='group',
                   title="Farmer Sentiment Distribution by Region")
fig.show()

# Step 3: Sentiment Trend Over Time
sent_trend = df.groupby(df['Date'].dt.to_period('M'))['Sentiment_Score'].mean().reset_index()
sent_trend['Date'] = sent_trend['Date'].astype(str)
fig = px.line(sent_trend, x='Date', y='Sentiment_Score', title='Monthly Sentiment Trend')
fig.update_yaxes(range=[-1, 1])
fig.show()

# Step 4: WordCloud for Emotions
emotion_text = " ".join(df['Emotion'].astype(str))
wc = WordCloud(width=900, height=400, background_color='white').generate(emotion_text)
plt.imshow(wc, interpolation='bilinear')
plt.axis('off')
plt.title("Farmer Emotions WordCloud")
plt.show()
No description has been provided for this image

USE CASE 4 — Market Demand Prediction¶

In [25]:
features_d = ['Price', 'Sentiment_Score', 'Rainfall', 'Temperature']
X = df[features_d]
y = df['Demand_Index']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
rf = RandomForestRegressor(n_estimators=200, random_state=42).fit(X_train, y_train)
df['Predicted_Demand'] = rf.predict(X)

rmse = np.sqrt(mean_squared_error(y_test, rf.predict(X_test)))
r2 = r2_score(y_test, rf.predict(X_test))
print(f"🔮 Demand Prediction - RMSE: {round(rmse,2)} | R²: {round(r2,3)}")

# Feature Importance
imp = pd.Series(rf.feature_importances_, index=features_d).sort_values(ascending=True)
fig = px.bar(imp, x=imp.values, y=imp.index, orientation='h',
             title='Feature Importance for Market Demand Prediction')
fig.show()
🔮 Demand Prediction - RMSE: 31.45 | R²: -0.068

USE CASE 5 — Regional Alert & Recommendation System¶

In [28]:
region_summary = df.groupby('Region').agg(
    avg_sentiment=('Sentiment_Score','mean'),
    avg_rainfall=('Rainfall','mean'),
    avg_temp=('Temperature','mean'),
    avg_price=('Price','mean'),
    avg_demand=('Demand_Index','mean'),
    high_risk_posts=('Pest_Alert','sum')
).reset_index()

def recommendation(row):
    if row['high_risk_posts'] > 5: return "🚨 High pest risk — monitor immediately"
    elif row['avg_sentiment'] < 0: return "😟 Negative sentiment — provide farmer support"
    elif row['avg_demand'] > 120: return "📈 High demand — plan supply chain"
    elif row['avg_rainfall'] < 10: return "💧 Low rainfall — irrigation advisory"
    else: return "✅ Stable region"

region_summary['Recommendation'] = region_summary.apply(recommendation, axis=1)

fig = px.bar(region_summary, x='Region', y='avg_sentiment', color='Recommendation',
             title='Regional Sentiment & Recommendations')
fig.show()

region_summary[['Region', 'Recommendation']]
Out[28]:
Region Recommendation
0 Andhra Pradesh 🚨 High pest risk — monitor immediately
1 Gujarat 🚨 High pest risk — monitor immediately
2 Karnataka 🚨 High pest risk — monitor immediately
3 Maharashtra 🚨 High pest risk — monitor immediately
4 Punjab 🚨 High pest risk — monitor immediately
5 Tamil Nadu 🚨 High pest risk — monitor immediately
6 Telangana 🚨 High pest risk — monitor immediately
In [ ]: